Libraries Used

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 4.1.3
## here() starts at C:/Users/mahri/OneDrive/CodeClan/rshiny_dashboard_project/Work In Progress/Demographics
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(stringr)


Read In Data

hosp_activity_agesex <- read_csv(here("../../raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_age_and_sex.csv"))
## Rows: 129599 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (12): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl  (6): Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, LengthOf...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
hosp_activity_simd <- 
  read_csv(here("../../raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv"))
## Rows: 40894 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl  (7): SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, Le...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.


Clean Data

hosp_activity_agesex <- janitor::clean_names(hosp_activity_agesex)
# glimpse(hosp_activity_agesex)

hosp_activity_simd <- janitor::clean_names(hosp_activity_simd)
# glimpse(hosp_acivity_simd)


We only need ACUTE patients, what do we have:

admission_type: * Elective Inpatients (not acute) * Emergency Inpatients
* Transfers (not acute)
* All Day cases (not acute)
* All Inpatients (not necasseraly acute)
* All Inpatients and Day cases (not acute)
* Not Specified (not acute???)

hosp_activity_agesex %>% 
  group_by(admission_type) %>% 
  summarise(total = n())
hosp_activity_simd %>% 
  group_by(admission_type) %>% 
  summarise(total = n())


removing those that aren’t acute (see above)

hosp_acute_activity_agesex <- hosp_activity_agesex %>% 
  filter(admission_type == "Emergency Inpatients")

hosp_acute_activity_simd <- hosp_activity_simd %>% 
  filter(admission_type == "Emergency Inpatients")

Seeing what we’re working with:

Age

hosp_acute_activity_agesex %>% 
  group_by(age) %>% 
  summarise(total_patients_in_age_group = n())
# 10 year increments i.e. 0-9, 10-19... 80-89, 90 years and over
# No NA values

hosp_acute_activity_agesex %>% 
  mutate(age = is.na(age)) %>% 
  filter(age == TRUE)

Sex

hosp_acute_activity_agesex %>% 
  group_by(sex) %>% 
  summarise(total_patients_in_sex_group = n())
# Female: 9,832   Male: 9,799 
# No NA values

hosp_acute_activity_agesex %>% 
  mutate(sex = is.na(age)) %>% 
  filter(sex == TRUE)

SIMD

hosp_acute_activity_simd %>% 
  group_by(simd) %>% 
  summarise(total = n())
#There are 962 NAs and ~ 1000 of levels 1:5... remember to tidy this in analysis




Graphs

Remember there were 7 admission types… I chose to keep one.

Might want to make use of this if we’re not sure about admission types: acute_target <- c("Emergency Inpatients", "Not Specified")

Others = Elective Inpatients, Transfers, All Day cases, All Inpatients, All Inpatients and Day cases, Not Specified


Graphs of general admissions and sex

Sex against time and admissions

# do I want to use total length of stay or total average_length_of_stay
# need a total column: 
# sometimes stay = 0, and length of stay = 5

#Male
total_stays_sex_males <- hosp_acute_activity_agesex %>% 
  filter(sex == "Male") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_sex = sum(stays))
total_stays_sex_males
#Female
total_stays_sex_females <- hosp_acute_activity_agesex %>% 
  filter(sex == "Female") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_sex = sum(stays))
total_stays_sex_females

Bind males and females:

hosp_acute_activity_sex_total_stays <- bind_rows(total_stays_sex_females, total_stays_sex_males)
hosp_acute_activity_sex_total_stays

ALSO WANT TO BIND FOR AGE GROUPS

hosp_acute_activity_sex_total_stays %>% 
  group_by(age) %>% 
  summarise(total = n())
# 0-9 years
age_0_9 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "0-9 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_0_9
# 10-19 years
age_10_19 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "10-19 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_10_19
# 20-29 years
age_20_29 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "20-29 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_20_29
# 30-39 years
age_30_39 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "30-39 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_30_39
# 40-49 years
age_40_49 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "40-49 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_40_49
# 20-29 years
age_50_59 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "50-59 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_50_59
# 60-69 years
age_60_69 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "60-69 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_60_69
# 70-79 years
age_70_79 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "70-79 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_70_79
# 80-89 years
age_80_89 <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "80-89 years") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_80_89
# 90 years and over
age_90_plus <- hosp_acute_activity_sex_total_stays %>% 
  filter(age == "90 years and over") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
age_90_plus

Bind these together

hosp_acute_activity_total_stays <- bind_rows(age_0_9, age_10_19, age_20_29, age_30_39, age_40_49, age_50_59, age_60_69, age_70_79, age_80_89, age_90_plus)
hosp_acute_activity_total_stays

Sex and stays

hosp_acute_activity_total_stays %>% 
  ggplot()+
  aes(x = quarter, 
      y = total_stays_per_quarter_sex, 
      group = sex, colour = sex) +
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Stays", 
       title = "Total Emergency Inpatient Stays Across Scotland by Sex",
       subtitle = "Q3, 2016 - Q3, 2021", 
       colour = "Sex") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))


AGE GROUPS AND TOTAL STAYS

hosp_acute_activity_total_stays %>% 
  ggplot()+
  aes(x = quarter, 
      y = total_stays_per_quarter_age, 
      group = age, colour = age) +
  geom_point()+
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Stays", 
       title = "Total Emergency Inpatient Stays Across Scotland by Age Group",
       subtitle = "Q3, 2016 - Q3, 2021", 
       colour = "Age") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))

SIMD - deprivation level data

SIMD (Scottish Index of Multiple Deprivation)

hosp_acute_activity_simd %>% 
  group_by(simd) %>% 
  summarise(total = n())
#There are 962 NAs and ~ 1000 of levels 1:5... remember to tidy this in analysis


Drop NAs

hosp_acute_activity_simd_no_na <- hosp_acute_activity_simd %>% 
  drop_na(simd)
hosp_acute_activity_simd_no_na
# simd 1
simd1 <- hosp_acute_activity_simd_no_na %>% 
  filter(simd == "1") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_simd = sum(stays))
simd1
# simd 2
simd2 <- hosp_acute_activity_simd_no_na %>% 
  filter(simd == "2") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_simd = sum(stays))
simd2
# simd 3
simd3 <- hosp_acute_activity_simd_no_na %>% 
  filter(simd == "3") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_simd = sum(stays))
simd3
# simd 4
simd4 <- hosp_acute_activity_simd_no_na %>% 
  filter(simd == "4") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_simd = sum(stays))
simd4
# simd 5
simd5 <- hosp_acute_activity_simd_no_na %>% 
  filter(simd == "5") %>% 
  group_by(quarter) %>% 
  mutate(total_stays_per_quarter_simd = sum(stays))
simd5

bind smid

hosp_acute_activity_total_stays_simd <- bind_rows(simd1, simd2, simd3, simd4, 
                                                  simd5)
hosp_acute_activity_total_stays_simd
hosp_acute_activity_total_stays_simd %>%
     mutate(simd = fct_relevel(as.character(simd, 
                                  "1", "2", "3", "4", "5"))) %>%
  ggplot()+
  aes(x = quarter, 
      y = total_stays_per_quarter_simd, 
      group = simd, colour = simd) +
  geom_point()+
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Stays", 
       title = "Total Emergency Inpatient Stays Across Scotland by SIMD Level",
       subtitle = "Q3, 2016 - Q3, 2021", 
       colour = "SIMD Level:
       1 = Most Deprived
       5 = Least Deprived") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))

EPISODES

# simd 1
simd1e <- hosp_acute_activity_total_stays_simd %>% 
  filter(simd == "1") %>% 
  group_by(quarter) %>% 
  mutate(total_episodes_per_quarter_simd = sum(episodes))
simd1e
# simd 2
simd2e <- hosp_acute_activity_total_stays_simd %>% 
  filter(simd == "2") %>% 
  group_by(quarter) %>% 
  mutate(total_episodes_per_quarter_simd = sum(episodes))
simd2e
# simd 3
simd3e <- hosp_acute_activity_total_stays_simd %>% 
  filter(simd == "3") %>% 
  group_by(quarter) %>% 
  mutate(total_episodes_per_quarter_simd = sum(episodes))
simd3e
# simd 4
simd4e <- hosp_acute_activity_total_stays_simd %>% 
  filter(simd == "4") %>% 
  group_by(quarter) %>% 
  mutate(total_episodes_per_quarter_simd = sum(episodes))
simd4e
# simd 5
simd5e <- hosp_acute_activity_total_stays_simd %>% 
  filter(simd == "5") %>% 
  group_by(quarter) %>% 
  mutate(total_episodes_per_quarter_simd = sum(episodes))
simd5e
hosp_acute_activity_total_stays_andEps_simd <- bind_rows(simd1e, simd2e, simd3e, 
                                                         simd4e, simd5e)
hosp_acute_activity_total_stays_andEps_simd %>%
     mutate(simd = fct_relevel(as.character(simd, 
                                  "1", "2", "3", "4", "5"))) %>%
  ggplot()+
  aes(x = quarter, 
      y = total_episodes_per_quarter_simd, 
      group = simd, colour = simd) +
  geom_point()+
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Stays", 
       title = "Total Emergency Inpatient Episodes Across Scotland by SIMD Level",
       subtitle = "Q3, 2016 - Q3, 2021", 
       colour = "SIMD Level:
       1 = Most Deprived
       5 = Least Deprived") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))